* follows:
* read_lights_raster.do (remote sensed droughts and lights), 
* read_ascii_cru.do (reads CRU sc-PDSI, temp and precip)
* read_other_rasters.do (to read in cross-sectional rasters) 
* read_pfaf4char.do (creates pfaf4_char.dta) --- dams and other pfaf4 characteristics

* preceeds reg_main.do, reg_mfx.do, reg_appendix.do, reg_iv.do
* data starts at .05 degree level and aggregated to .5 degree

log using merge_grid, replace
timer on 1

use "../data/grid_lights_drought.dta", clear
sort x y year
drop if missing(lights)

* merge cross-sectional data that's at .05 degree cell level
merge m:1 x y using "../data/flares.dta"
drop if _merge==2
gen flare=(detection_frequency_2012>0) & _merge==3
drop _merge
summarize lights if flare==1, detail

merge m:1 x y using "../data/urban.dta"
drop if _merge==2
drop _merge


*get pfaf4 codes, 
merge m:1 x y using  "../data/pfaf4_grid.dta"
summarize x y if _merge==2, detail

sort conpfaf4 year

preserve

* aggregate to pfaf4 levels as needed for upstream drought 
drop if flare
collapse (mean) lightspfaf4=lights droughtpfaf4=drought (count) cellsinpfaf4=lights, by(conpfaf4 year)

label var lightspfaf4 "Average lights by pfaf4 & year"
label var droughtpfaf4 "Average drought by pfaf4 & year"
label var cellsinpfaf4 "Num .05 degree cells in pfaf4"

label data "Drought and lights by pfaf4 year" 

tempfile pfaf4_drought

save `pfaf4_drought'


*create information on lights by pfaf4 so can exclude unpopulated places
collapse (min) minlights=lightspfaf4, by(conpfaf4)
 
label var minlights "Min (over time) lights for subbasin" 


save minlights, replace
restore

* pick Pfaf4 that is most common in .5 degree grid cell

gen x_10=floor((x-1)/10) + 1
gen y_10=floor((y-1)/10) + 1 

bys x_10 y_10: egen conpfaf4mode=mode(conpfaf4), minmode 

*now collapse to .5 degree
collapse (mean) lights drought ihslights urbanshare (sum) flare urban (firstnm) conpfaf4mode, by(x_10 y_10 year)


rename x_10 x
rename y_10 y
rename conpfaf4mode conpfaf4
replace urban=(urban>0)


label var flare "Flare in .5 deg cell in 2012"
label var lights "Light index"
label var drought "Remote-sensed DSI"
label var conpfaf4 "Pfaf4, unique by continent"
label var urban "Some urban area in cell"
label var urbanshare "Urban share"

sort x y year


* add temp and precip data at half degree level
merge 1:1 x y year using "../data/annual_av_tmp.dta"
drop if _merge==2
drop _merge

merge 1:1 x y year using "../data/annual_av_pre.dta"
drop if _merge==2
drop _merge


sort conpfaf4

merge m:1 conpfaf4 using "../data/pfaf4_char.dta"

drop if _merge==2

drop _merge


merge m:1 conpfaf4 using minlights
drop if _merge==2
drop _merge

drop if minlights==0 | missing(minlights)

sort x y year

*11 bins
generate dsi_cat_temp=recode(drought,-1.5,-1.2,-.9,-.6,-.3,.3,.6,.9,1.2,1.5,4) if !missing(drought)
egen byte dsi_cat11=group(dsi_cat_temp) if !missing(drought)
drop dsi_cat_temp
*7 bins
gen dsi_cat7=dsi_cat11
replace dsi_cat7=7 if dsi_cat11>=7

** binary
gen extreme=(dsi_cat11==1) if !missing(dsi_cat11)
gen extsev=(dsi_cat11<=2) if !missing(dsi_cat11)
gen byte esmdsi=(dsi_cat11<=3) if !missing(dsi_cat11)



label define cat11 1 "Extreme drought"  2 "Severe drought" 3 "Moderate drought" 4 "Mild drought" ///
5 "Incipient drought" 6 "Near normal" 7 "Incipient wet spell"  8 "Slightly wet" 9 "Moderately wet" ///
10 "Very wet"  11 "Extremely wet"
label values dsi_cat11 cat11
label define cat7 1 "Extreme drought"  2 "Severe drought" 3 "Moderate drought" 4 "Mild drought" ///
5 "Incipient drought" 6 "Near normal" 7 "Wet"  
label values dsi_cat7 cat7
label var dsi_cat11 "DSI, 11 categories"
label var dsi_cat7 "DSI, 7 categories"
label var extsev "Extreme or severe drought (DSI)"
label var esmdsi "Moderate or worse drought (DSI)"

sort y x year

** merge sc-PDSI data, also only at half degree level
merge 1:1 y x year using "../data/pdsi.dta", sorted
drop if _merge==2
drop _merge

*using definitions in van der Schrier article
generate pdsi_cat_temp=recode(pdsi_av,-4,-3,-2,-1,-.5,.5,1,2,3,4,30) if !missing(pdsi_av)
gen pdsi_summer_cat_temp=recode(pdsi_summer,-4,-3,-2,-1,-.5,.5,1,2,3,4,30) if !missing(pdsi_summer)
gen pdsi_min_cat_temp=recode(pdsi_min,-4,-3,-2,-1,-.5,.5,1,2,3,4,30) if !missing(pdsi_min)

*matching distribution of cut points in remote sensed DSI
*_pctile pdsi_av, percentiles(4, 8, 15, 25, 36, 63, 75, 85, 92, 97)
*generate pdsi_cat_temp=recode(pdsi_av,`r(r1)',`r(r2)',`r(r3)',`r(r4)',`r(r5)',`r(r6)',`r(r7)',`r(r8)',`r(r9)',`r(r10)',30)
egen byte pdsi_cat=group(pdsi_cat_temp)
egen byte pdsi_summer_cat=group(pdsi_summer_cat_temp)
egen byte pdsi_min_cat=group(pdsi_min_cat_temp)

drop pdsi_cat_*temp 

label var pdsi_cat "sc-PDSI categories"
label var pdsi_summer_cat "sc-PDSI categories in June or Dec"
label var pdsi_min_cat "sc-PDSI categories at annual min"

label define pcat 1 "Extremely dry"  2 "Severely dry" 3 "Moderately dry" 4 "Mildly dry" ///
5 "Incipient drought" 6 "Near normal" 7 "Incipient wet spell"  8 "Slightly wet" 9 "Moderately wet" ///
10 "Very wet"  11 "Extremely wet"
label values pdsi_cat pcat
label values pdsi_summer_cat pcat
label values pdsi_min_cat pcat



gen byte extpdsi=(pdsi_cat==1) if !missing(pdsi_cat)
gen byte extsevpdsi=(pdsi_cat<=2) if !missing(pdsi_cat)
gen byte esmpdsi=(pdsi_cat<=3) if !missing(pdsi_cat)

label variable extpdsi "Extreme drought (scPDSI)" 
label variable extsevpdsi "Extreme or severe drought (scPDSI)" 
label variable esmpdsi "Moderate or worse drought (scPDSI)"


*merge on half degree grids
* read in read_other_raster.do

merge m:1 y x using "../data/cropland2000.dta"
drop if _merge==2
drop _merge

merge m:1 y x using "../data/irrigation.dta"
drop if _merge==2
drop _merge

* use WB definition of center of cells
gen lon=-179.75+.5*(x-1)
gen lat=74.75-.5*(y-1)

label var lon "Logitude (midpoint)"
label var lat "Latitude (midpoint)"

sort lon lat

*WB groundater grid
merge m:1 lon lat using "../data/aqtyp_gwresource_grid05deg.dta"

drop if _merge==2
drop _merge


label var grid_id "WB cell ID"
label var aqtyp_max "Aquifer typology class with highest % grid cell (excluding NA)"
label var aqtyp_pct_ma "Major alluvial (%)" 
label var aqtyp_pct_cx "Complex aquifer (%)" 
label var aqtyp_pct_kt  "Karstic (%)"
label var aqtyp_pct_ls "Local/shallow aquifer (%)"
label var aqtyp_pct_NA "No aquifer data (%)" 
label var resource_pct_grid_na "Not covered by country-lithological-outcrop resource data (% of grid cell)"
label var resource "GW resource, sum w/in cell (10^9 m3/yr)" 
label var resource_norm "GW resource, normalized (10^9 m3/yr)"

drop grid* pfaf4





*** for upstream drought analysis, get basin level drought,

sort conpfaf4 year

merge m:1 conpfaf4 year using `pfaf4_drought'
drop if _merge==2
drop lightspfaf4 cellsinpfaf4 _merge
rename droughtpfaf4 d4temp

tempfile all
save `all'


** add upstream drought information
use "../data/upstream_basins.dta", clear
rename conpfaf4 start_basin
rename up conpfaf4

sort conpfaf4

*merge pfaf4-drought data created above
*want all years and all upstream basins
joinby conpfaf4 using `pfaf4_drought'

collapse (mean) upstream_drought=droughtpfaf4, by(start_basin year)

rename start_basin conpfaf4

label var upstream_drought "Average drought for all upstream subbasins"

sort conpfaf4 year

merge 1:m conpfaf4 year using `all'
drop if _merge==1

drop _merge
rename d4temp droughtpfaf4

compress

save for_reg, replace

timer off 1
timer list 1
log close

timer clear 1



